file_tag = sprintf("%s_%s", cell_type_name, graph_weight)
assayed_genes = scan(sprintf("output/gene_list_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = scan(sprintf("output/name_s_%s.txt", file_tag),
what = character(), sep="\n")
gene_sets = sapply(gene_sets, strsplit, split=",")
n_genes = sapply(gene_sets, length)
names(n_genes) = NULL
summary(n_genes)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 7.00 22.00 26.00 24.57 28.25 30.00
## [1] 40
## [1] 7 14 15 17 19 21 21 21 21 22 22 23 23 23 24 25 25 26 26 26 26 26 26 26 27
## [26] 27 27 28 28 28 29 29 29 29 29 29 29 30 30 30
bioMart.All the gene symbols that can be found in bioMart are
consistent with what we have. So no need to run it.
ensembl = useMart("ensembl", dataset = "hsapiens_gene_ensembl")
gene_BM = getBM(attributes = c("hgnc_symbol", "external_gene_name"),
filters = "external_gene_name",
values = assayed_genes,
mart = ensembl)
length(assayed_genes)
dim(gene_BM)
gene_BM[1:2,]
table(assayed_genes %in% gene_BM$external_gene_name)
t1 = table(gene_BM$external_gene_name)
dup = names(t1)[t1 > 1]
gene_BM[gene_BM$external_gene_name %in% dup,]
table(gene_BM$hgnc_symbol == gene_BM$external_gene_name)
w2kp = which(gene_BM$hgnc_symbol != gene_BM$external_gene_name)
gene_BM[w2kp,]alias2Symbol function from
limma.a2s = rep(NA, length(assayed_genes))
for(i in 1:length(assayed_genes)){
gi = assayed_genes[i]
ai = alias2Symbol(gi)
if(length(ai) > 1){
print(gi)
print(ai)
}
a2s[i] = ai[1]
}
table(is.na(a2s))##
## FALSE TRUE
## 1739 261
##
## FALSE TRUE <NA>
## 21 1718 261
gene_info = data.table(sym_in_data = assayed_genes, sym_limma = a2s)
gene_info[sym_in_data != sym_limma,]## sym_in_data sym_limma
## 1: LRRC6 DNAAF11
## 2: C11orf49 CSTPP1
## 3: LINC00476 ERCC6L2-AS1
## 4: HNRNPA1P48 HNRNPA1L3
## 5: ZRANB2-AS2 ZRANB2-DT
## 6: LINC00271 AHI1-DT
## 7: RFX3-AS1 RFX3-DT
## 8: LINC00884 ATP13A3-DT
## 9: FBXO30-DT EPM2A-DT
## 10: LINC00894 EOLA2-DT
## 11: COX10-AS1 COX10-DT
## 12: LINC01184 SLC12A2-DT
## 13: FAM198B-AS1 GASK1B-AS1
## 14: C5orf17 LINC02899
## 15: C8orf37-AS1 CFAP418-AS1
## 16: LINC01146 HISLA
## 17: BTBD11 ABTB3
## 18: H2BU1 H2BC26
## 19: SKIV2L SKIC2
## 20: FAM155A NALF1
## 21: CTB-41I6.2 PIK3R5-DT
## sym_in_data sym_limma
gene_info[, gene_symbol := sym_in_data]
gene_info[which(sym_in_data != sym_limma & (gene_symbol != "MT-CO2")),
gene_symbol := sym_limma]
dim(gene_info)## [1] 2000 3
## sym_in_data sym_limma gene_symbol
## 1: CFTR CFTR CFTR
## 2: ICA1 ICA1 ICA1
## 3: PDK4 PDK4 PDK4
## 4: CALCR CALCR CALCR
## 5: ABCB4 ABCB4 ABCB4
## t1
## 1
## 2000
Gene set annotations (by gene symbols) were downloaded from MSigDB website.
gmtfile = list()
gmtfile[["reactome"]] = "../Annotation/c2.cp.reactome.v2023.2.Hs.symbols.gmt"
gmtfile[["go_bp"]] = "../Annotation/c5.go.bp.v2023.2.Hs.symbols.gmt"
pathways = list()
for(k1 in names(gmtfile)){
pathways[[k1]] = gmtPathways(gmtfile[[k1]])
}
names(pathways)## [1] "reactome" "go_bp"
## reactome go_bp
## 1692 7647
Filter gene sets for size between 10 and 500.
## $reactome
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 7.0 9.0 12.0 17.0 23.0 31.0 44.0 71.8 120.9 1463.0
##
## $go_bp
## 0% 10% 20% 30% 40% 50% 60% 70% 80% 90% 100%
## 5.0 6.0 8.0 10.0 14.0 19.0 29.0 46.0 80.8 183.0 1966.0
## [1] 2000 3
## sym_in_data sym_limma gene_symbol
## 1: CFTR CFTR CFTR
## 2: ICA1 ICA1 ICA1
## [1] 36517 9
## gene_ids feature_is_filtered feature_name feature_reference
## 1: ENSG00000000003 FALSE TSPAN6 NCBITaxon:9606
## 2: ENSG00000000005 FALSE TNMD NCBITaxon:9606
## feature_biotype n_cells_by_counts mean_counts pct_dropout_by_counts
## 1: gene 76 0.002350 99.8100
## 2: gene 1 0.000025 99.9975
## total_counts
## 1: 94
## 2: 1
## [1] 2000
##
## TRUE
## 2000
##
## FALSE TRUE
## 34517 2000
gene_dat$selected = 0
gene_dat$selected[match(gene_info$sym_in_data, gene_dat$feature_name)] = 1
table(gene_dat$selected)##
## 0 1
## 34517 2000
## $`0`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 94.48 99.58 93.53 99.97 100.00
##
## $`1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6475 65.5944 84.3650 76.3053 93.6756 97.9950
gene_info = merge(gene_info, gene_dat, by.x="sym_in_data",
by.y="feature_name", all.x = TRUE, all.y = FALSE)
dim(gene_info)## [1] 2000 12
## sym_in_data sym_limma gene_symbol gene_ids feature_is_filtered
## 1: AATK AATK AATK ENSG00000181409 FALSE
## 2: ABAT ABAT ABAT ENSG00000183044 FALSE
## feature_reference feature_biotype n_cells_by_counts mean_counts
## 1: NCBITaxon:9606 gene 3157 0.091525
## 2: NCBITaxon:9606 gene 9074 0.368950
## pct_dropout_by_counts total_counts selected
## 1: 92.1075 3661 1
## 2: 77.3150 14758 1
max_n2kp = 10
goseq_res = NULL
for(k in 1:length(gene_sets)){
if(length(gene_sets[[k]]) < 10) { next }
print(k)
set_k = paste0("set_", k)
print(gene_sets[[k]])
genes = gene_info$sym_in_data %in% gene_sets[[k]]
names(genes) = gene_info$gene_symbol
table(genes)
pwf = nullp(genes, "hg38", "geneSymbol",
bias.data = 100 - gene_info$pct_dropout_by_counts)
for(k1 in names(pathways)){
p1 = pathways[[k1]]
res1 = goseq(pwf, "hg38", "geneSymbol",
gene2cat=goseq:::reversemapping(p1))
res1$FDR = p.adjust(res1$over_represented_pvalue, method="BH")
nD = sum(res1$FDR < 0.05)
if(nD > 0){
res1 = res1[order(res1$FDR),][1:min(nD, max_n2kp),]
res1$category = gsub("REACTOME_|GOBP_", "", res1$category)
res1$category = gsub("_", " ", res1$category)
res1$category = tolower(res1$category)
res1$category = substr(res1$category, start=1, stop=81)
goseq_res[[set_k]][[k1]] = res1
}
}
}## [1] 1
## [1] "MCTP2" "ADAMTS3" "GHRL" "HESX1"
## [5] "UCN" "WDR25" "EYS" "PAPSS2"
## [9] "CARNMT1-AS1" "MTCP1" "LINC01277" "LINC00271"
## [13] "LINC02884" "RPL34-DT" "RP11-195C7.1" "KCNMA1-AS1"
## [17] "RP11-544A12.8" "STARD4-AS1" "C5orf17" "LINC02196"
## [21] "MIR100HG" "RP1-71H24.1" "LINC02444" "RP11-58G13.1"
## [25] "RP11-354K4.2" "TRG-AS1" "RP11-237N2.1" "RP11-314E10.1"
## [1] 2
## [1] "MTERF1" "PIGC" "ADAMTS19" "ADGRG5"
## [5] "BEX3" "FLJ40194" "TMEM151B" "PCED1B"
## [9] "H2BC4" "H2BC18" "COL5A2" "ENTPD3-AS1"
## [13] "TEX41" "ZNF451-AS1" "RP4-568F9.6" "RP11-557H15.4"
## [17] "LINC02642" "BDNF-AS" "LINC02057" "AC008697.1"
## [21] "CTD-2537O9.1" "RP11-370I10.6" "H2AC19" "H2BC8"
## [25] "H2AC8" "RP11-495L19.1" "RP11-321P16.3" "RP11-399K19.1"
## [1] 3
## [1] "SIGLEC1" "ADGRD1" "F13A1" "CH25H"
## [5] "TMEM44" "BANK1" "ADAMTS15" "ODF3B"
## [9] "RP11-445F6.2" "LINC01515" "AC002539.1" "RP3-525N10.2"
## [13] "AC007682.1" "RP11-142A5.1" "LINC02211" "FAM66D"
## [17] "OVCH1-AS1" "CTD-2336H13.2" "LINC02073" "CERNA2"
## [21] "LINC02885" "RP11-38B6.1"
## [1] 4
## [1] "APBB1IP" "ACER3" "PLEK" "TBC1D14" "NPL" "ARMH4"
## [7] "GAREM1" "RASGEF1C" "AOPEP" "DISP1" "DEPTOR" "ATF7IP2"
## [13] "MARCHF3" "DLEU1" "SLC8A1" "SCFD2" "PRKN" "MBD5"
## [19] "HMGA1P4" "XACT" "LINC01094" "RNASET2" "ARHGAP6" "PADI2"
## [25] "FMN1"
## [1] 5
## [1] "CELSR3" "GALNT16" "LILRB5" "HS3ST2"
## [5] "NRG2" "JAML" "PLPP3" "RANBP3L"
## [9] "FSTL5" "STXBP6" "HPSE2" "LRRC3B"
## [13] "ADGRB1" "VMO1" "POTEG" "ZNF521"
## [17] "FAM153CP" "FAM229A" "RP11-47I22.3" "AF064858.8"
## [21] "TRHDE-AS1" "AC002066.1" "RP11-696N14.1" "CPEB2-DT"
## [25] "SELENOP" "RP11-79E3.2" "RP11-661A12.5" "RP11-429A20.3"
## [29] "RP11-396B14.2" "RP11-436D23.1"
## [1] 6
## [1] "MMP24" "C4orf19" "CYTL1" "ENO4"
## [5] "FAM177B" "CLEC9A" "INSYN2B" "ANKUB1"
## [9] "ELMO1-AS1" "TMLHE-AS1" "ENTPD1-AS1" "AC096558.1"
## [13] "SRGAP2-AS1" "AC037445.1" "RP11-528G1.2" "DIAPH2-AS1"
## [17] "LINC01141" "MEF2C-AS2" "LINC02742" "RP11-649G15.2"
## [21] "RP11-452H21.1" "CTD-2647E9.3" "RP11-815J21.4" "ABHD15-AS1"
## [25] "RP11-16C1.3" "RP11-793A3.2" "CTD-3149D2.4" "CTD-2282P23.2"
## [29] "DISC2"
## [1] 7
## [1] "NAV3" "LY96" "FGGY" "ZDHHC14" "KCNQ3"
## [6] "MIR99AHG" "MEF2C-AS1" "LINC02232" "ATP6V0D1-DT" "KCNQ2"
## [11] "CUX2" "FRMD4B" "WDR45B" "SLC2A5" "HSPA12A"
## [16] "RBFOX3" "MYO1D" "KCNQ5" "ZNF846" "ZNF407"
## [21] "LNCAROD"
## [1] 8
## [1] "TENT5A" "CYREN" "TMEM241" "RALGPS1"
## [5] "SLC49A4" "GASK1A" "PUS10" "GASK1B"
## [9] "ACYP2" "RNLS" "LIPE-AS1" "ALOX12-AS1"
## [13] "HDAC2-AS2" "COX10-AS1" "MIR4300HG" "RP11-10H3.1"
## [17] "C8orf37-AS1" "RP11-554D14.6" "LINC02328" "RP11-187O7.3"
## [21] "MIR222HG" "RP11-323F24.5" "RP1-111C20.3" "RP11-574F11.4"
## [25] "ATOH8" "KLF13"
## [1] 9
## [1] "IFT43" "ATP5F1E" "STAB1" "LCP2" "HDHD5" "SESN1" "DOCK3"
## [8] "BICRAL" "GTDC1" "AFDN" "PRAM1" "CCDC40" "NR6A1" "RSU1"
## [15] "SLC2A13" "SH3RF1" "ST3GAL2" "NEK10" "PRKG1" "AIDA" "GLDN"
## [22] "PARVB" "IARS1"
## [1] 10
## [1] "SNCAIP" "PITPNM2" "PROCR" "IL1A" "C3" "FGF13"
## [7] "STON2" "RNF157" "KCNMB1" "KCNMA1" "TSPAN7" "TAGAP"
## [13] "RIC3" "P2RY12" "XIST" "LINC01091" "MMP28" "RGCC"
## [19] "SH3BP5" "LPL" "KCNE1"
## [1] 12
## [1] "OPHN1" "CASS4" "RIPOR2" "PDE10A" "PLCL1"
## [6] "AHI1" "WDPCP" "UNC80" "IRAK1BP1" "TENM4"
## [11] "CACNA1D" "SMIM14" "ZNF608" "CAPS2" "FIGN"
## [16] "RTN4RL1" "COL25A1" "PRDX6-AS1" "AC011288.2" "LINC00894"
## [21] "GMDS-DT" "RP11-673E1.1" "RP11-160E2.6" "RP4-681L3.3" "HIVEP2"
## [26] "MCTP1"
## [1] 13
## [1] "SLC1A3" "RAPGEF1" "PTPRE" "SSH2" "FER" "MGAT5"
## [7] "FMNL2" "CDC42SE2" "RUNX1" "CLASP2" "OXR1" "LDLRAD4"
## [13] "GPHN" "BNC2" "CHD2" "PACS1" "PCDH9" "PDE4B"
## [19] "NEAT1"
## [1] 14
## [1] "NIPAL2" "CPED1" "GPAT3" "SESN3"
## [5] "ARHGAP42" "CYSLTR1" "RAB39A" "CATSPERE"
## [9] "CDIN1" "SDHAF3" "MAMDC2-AS1" "FIRRE"
## [13] "CTA-292E10.6" "RP11-37B2.1" "RP11-630C16.2" "RP11-946L16.1"
## [17] "MIR223HG" "SNX25" "OGFRL1" "SEM1"
## [21] "RASGEF1B" "C17orf67" "CLEC5A"
## [1] 15
## [1] "SLC5A9" "RAB38" "CFAP74" "TCERG1L"
## [5] "SPDYE2" "RP11-165F24.3" "RP11-526K17.2" "HCG22"
## [9] "LINC00960" "RP11-712B9.2" "RP11-381K20.2" "RP11-39E3.3"
## [13] "RP11-434H14.1" "RP11-493L12.3" "RP11-317G6.1" "PRKCA-AS1"
## [17] "H2BC7" "RP11-622O11.6" "COL5A3" "SLC26A7"
## [21] "SNED1" "HTR1E" "DIRC3" "DARS1-AS1"
## [25] "CYP1B1-AS1" "AC092835.2" "MIR155HG" "RP11-384C4.7"
## [29] "LINC01138"
## [1] 16
## [1] "SYNDIG1" "FAM149A" "SPATA6" "C1QC"
## [5] "MIR31HG" "LINC01357" "FTCDNL1" "RP11-162D16.2"
## [9] "RP11-120D5.1" "LINC01684" "CEACAM16-AS1" "LINC00907"
## [13] "KCNQ1OT1" "RP11-74J13.8" "EIF1B-AS1" "RP11-437L7.2"
## [17] "RP11-685G11.1" "APBB3" "DARS1" "COL5A1"
## [21] "MFSD4B" "H2AC6" "ZC3H6" "CTD-2643I7.5"
## [25] "RP11-707P17.1" "RP11-452F19.4"
## [1] 17
## [1] "ZNF532" "ARHGEF18"
## [3] "PLAGL1" "FLVCR2"
## [5] "PUDP" "PCBD2"
## [7] "ARHGAP20" "SAMSN1"
## [9] "DENND2B" "CDYL2"
## [11] "SPTLC3" "MICOS10"
## [13] "PGBD5" "B3GLCT"
## [15] "TMSB4X" "LINC00630"
## [17] "RP11-535M15.1" "SLC16A1-AS1"
## [19] "LMCD1-AS1" "PVT1"
## [21] "LIX1-AS1" "LINC01605_ENSG00000253161"
## [23] "RP11-588H23.3" "LINC01146"
## [25] "RP11-281A20.2" "RP11-692P14.1"
## [27] "RP11-111G13.1"
## [1] 18
## [1] "PRKCQ" "PKD2L2" "ANGPT2" "MAP2K6" "NEK11" "PTPRB"
## [7] "FMO5" "PDE5A" "EFCAB11" "MED12L" "MTHFD2L" "TMEM52B"
## [13] "SLC35G2" "KCNMB2" "LTC4S" "KIF9-AS1" "CNTN1" "HGF"
## [19] "RASGRP2" "EMILIN2" "CRIM1" "CAMK4" "KCND3" "C11orf80"
## [25] "PPM1E" "NELL2"
## [1] 19
## [1] "LRRC7" "ARHGAP10" "RAPGEF4" "FGF14" "ADAM23"
## [6] "ATP8A2" "PDZD2" "STXBP5L" "PAM" "TENM2"
## [11] "ADGRL3" "NCAM2" "CSMD3" "NLGN1" "CTNND2"
## [16] "FUT9" "CADM2" "RIMS2" "NRXN1" "FLRT2"
## [21] "NRG3" "TAFA2" "LINC00632" "SNHG14" "RP11-384F7.2"
## [26] "IQCJ-SCHIP1"
## [1] 20
## [1] "SGCE" "C11orf65" "RP11-321C24.1" "ARHGAP26-AS1"
## [5] "ALG13-AS1" "RP11-353M9.1" "MYCBP2-AS1" "CCDC200"
## [9] "AC018890.6" "ZBTB20-AS5" "CTD-2015H6.3" "RP11-268P4.5"
## [13] "CTD-3239E11.2" "RP11-238K6.1" "RP11-513G19.1" "RP11-571M6.7"
## [17] "RP11-136F16.1" "RP11-713N11.4" "USP3-AS1" "RP11-16B9.1"
## [21] "RP11-106M3.3" "RNF213-AS1" "RP11-737O24.2" "RP11-53B2.3"
## [25] "RP4-769N13.7" "RP11-793H13.14" "RP11-458A7.1" "RP11-1146N6.3"
## [29] "RP11-685G9.5" "CTD-2021K4.2"
## [1] 21
## [1] "PER3" "CYFIP2" "DHRS9" "RPS6KA5" "STMN2" "ABCG2"
## [7] "SLC22A23" "PTPN14" "CCDC171" "PLEKHA7" "GAP43" "SVIL"
## [13] "LINC01278" "FOSL2" "SLC16A10" "CYTIP" "HIP1" "PARP8"
## [19] "SRSF12" "AGAP1" "FAM110B" "ZFPM2"
## [1] 22
## [1] "ANGPTL1" "ITGA9" "AGBL3" "LINC00539"
## [5] "AC017101.10" "RP1-225E12.2" "COA6-AS1" "AC083884.8"
## [9] "RP11-666F17.1" "DENND6A-AS1" "RP11-692D12.1" "RP11-679C8.2"
## [13] "RP11-703M24.5" "RP11-7F3.1" "AC009166.9" "ZFY"
## [17] "PXDNL" "RNF150" "LINC00910" "H1-0"
## [21] "KLF3-AS1" "LINC02646" "RP11-351A11.1" "HIF1A-AS3"
## [25] "RP11-322M13.1" "CTD-2285E13.1" "RP11-404C6.6"
## [1] 23
## [1] "NEURL2" "LILRB2"
## [3] "PEBP4" "IGSF11"
## [5] "ADGRA3" "LINC00467"
## [7] "SLC9B1" "H2AC20"
## [9] "FSIP2" "LINC01285"
## [11] "ATP6AP1L" "AC003090.1"
## [13] "RMDN2-AS1" "LINC01376"
## [15] "MEIKIN" "RP11-246A10.1"
## [17] "ALG1L9P_ENSG00000248671" "AC006160.5"
## [19] "RP11-1H15.2" "RP11-511B23.2"
## [21] "RP11-638I2.8" "LINC00639"
## [23] "RP11-221G19.1" "RP1-18C9.3"
## [25] "RP11-21M24.6" "RP11-65F13.4"
## [27] "RP11-56I23.2" "AMER2"
## [29] "LINC00970"
## [1] 24
## [1] "LINC00996" "SLC26A3"
## [3] "HELLS" "CHI3L1"
## [5] "LYPD5" "TNFRSF11B"
## [7] "LINC02397" "ARL17B"
## [9] "MYO16-AS1" "RP3-467L1.4"
## [11] "RP11-657O9.1" "RP11-13N12.1"
## [13] "MTRNR2L1" "RP11-81K2.1"
## [15] "SCARNA2_ENSG00000270066" "LINC00551"
## [17] "CH507-513H4.1"
## [1] 25
## [1] "PCSK1N" "IER3" "ARL5C" "AZIN2"
## [5] "NLGN4Y" "MIR4435-2HG" "GLIPR1L1" "TMEM72-AS1"
## [9] "RP1-30E17.2" "SLC8A1-AS1" "MIR646HG" "KCNMB2-AS1"
## [13] "ARHGAP8" "STX18-AS1" "ANK2-AS1" "RP11-281P23.2"
## [17] "ZFPM2-AS1" "MAILR" "AP000487.6" "RP11-516C1.1"
## [21] "LINC00621" "RP11-399K21.11" "LINC02712" "RP11-305E17.8"
## [25] "RP11-19D22.1" "RP11-264E23.4" "RP11-555K12.4" "PCDH11Y"
## [1] 26
## [1] "PEX7" "RAB40B" "ARAP2" "ABCA7"
## [5] "FAM107B" "SLC9A7" "RNMT" "IL4I1"
## [9] "DCUN1D4" "ZC4H2" "BTG1" "ADCY3"
## [13] "FAM117B" "COLEC12" "CCDC107" "FRMD5"
## [17] "RBM44" "ARL4C" "SYCP2" "H2BU1"
## [21] "CR1L" "CD247" "DENND1B" "AS3MT"
## [25] "USP2-AS1" "RP11-368L12.1"
## [1] 27
## [1] "P3H2" "AKAP7" "ACVR2A" "PLAU" "SMAD6"
## [6] "CCDC102B" "PRMT9" "SLC9A9" "KCNIP1" "DYNC2H1"
## [11] "HLA-DRA" "CYTH1" "AGAP3" "CSGALNACT1" "EVA1C"
## [1] 28
## [1] "RPL3" "RPL18A" "RPL19" "RPL34" "RPL24"
## [6] "LDAH" "RPS24" "RPS2" "RPS11" "RPL11"
## [11] "RPS27A" "RPL32" "RAB28" "RPL26" "RPL38"
## [16] "RPLP2" "RPS23" "RPL12" "RPL23A" "ZNF433-AS1"
## [21] "ADARB1"
## [1] 29
## [1] "MAP3K20" "ARRDC2" "CDH26" "KIAA0513"
## [5] "CCNG2" "PGGHG" "TIAM2" "AMN1"
## [9] "CFAP161" "ANKRD30BL" "MARCHF8" "PDZRN4"
## [13] "CXXC5" "LINC00862" "RP5-864K19.4" "SGO1-AS1"
## [17] "RP11-147G16.1" "RP11-154D17.1" "RP11-305L7.3" "SLC5A4-AS1"
## [21] "LINC01322" "RP11-20D14.3" "FMNL1-DT" "HCG17"
## [25] "RP11-154H23.4" "AC003099.2"
## [1] 30
## [1] "ST6GAL1" "FRMD4A" "SFMBT2" "DENND1A" "CHSY1" "DENND4C"
## [7] "TLR2" "SSBP2" "ARHGAP18" "JAK1" "RBM47" "AXL"
## [13] "TANC2" "TLR5"
## [1] 31
## [1] "PLPP1" "CARMIL1" "NME3" "PLGRKT" "TMEM156"
## [6] "MMS22L" "TMEM144" "FILIP1L" "SRGAP3" "LINC01356"
## [11] "PTCHD4" "ST3GAL1" "PDE8A" "PILRA" "RGS1"
## [16] "GRK3" "SRGN" "SEC14L1" "TMEM65" "ROBO1"
## [21] "CYRIA" "MIR181A1HG" "IFITM10"
## [1] 32
## [1] "TTC7A" "HAMP" "PIGL" "BTG2"
## [5] "LIPC" "MACROD2" "TTTY14" "DLEU7"
## [9] "LINC02649" "AC007879.5" "RP11-202G18.1" "CCDC26"
## [13] "RFX3-AS1" "LINC00884" "RNF217-AS1" "RP11-360F5.1"
## [17] "LINC02762" "CTC-575N7.1" "RP11-624C23.1" "RP11-463D19.1"
## [21] "SMIM35" "RP11-196H14.2" "RP11-323I15.5" "COPG2IT1"
## [1] 33
## [1] "KIF26B" "ADAM22" "PDE4A" "CELF4" "MOB3B" "TGFBI"
## [7] "PPFIA2" "CACNA1B" "ARID5B" "CACNA1C" "CACNA2D1" "FMN2"
## [13] "SORCS3" "SYN2" "GRM5" "GRIN1" "CADM1" "SYN3"
## [19] "MYT1L" "DPYD" "RYR2"
## [1] 34
## [1] "CALCR" "MBNL3" "ADGRG6" "PID1"
## [5] "NMNAT3" "BEAN1" "TMC7" "LINC00476"
## [9] "ARSJ" "SCOC-AS1" "AC105760.2" "ZRANB2-AS2"
## [13] "RP5-1198O20.4" "ITGA9-AS1" "RP11-634B7.4" "RORA-AS1"
## [17] "MIR3142HG" "LINC00506" "RP11-622I12.1" "RP11-1H15.4"
## [21] "RP11-6G22.1" "MAMLD1" "SPOCK2" "TBC1D10C"
## [25] "LINC01435" "RP1-179N16.6" "AGBL1"
## [1] 35
## [1] "CHD5" "OLFM1" "CEMIP2" "CBLN2"
## [5] "CDH18" "CNTN5" "NPAS3" "ME3"
## [9] "AK5" "PALM2AKAP2" "CLSTN2" "ABCG1"
## [13] "CAMK2N1" "LRFN5" "NEGR1" "RESF1"
## [17] "H1-10" "LSAMP" "PTPRT" "CACNA1E"
## [21] "TOX" "FAM155A" "MEG3" "RP11-191L9.4"
## [25] "MIAT" "SHISA9" "RP11-17A1.3" "RP11-259O2.1"
## [29] "RP11-111A21.1"
## [1] 36
## [1] "ATP8B1" "IL7" "SSPN" "SHOC1"
## [5] "PLD4" "AC002463.3" "HNRNPA1P48" "AC105461.1"
## [9] "RP11-142M10.2" "ARHGAP15-AS1" "RP5-1132H15.1" "LANCL1-AS1"
## [13] "RP11-575L7.8" "FBXO30-DT" "CPB2-AS1" "RP1-117O3.2"
## [17] "KDM4A-AS1" "BCL2L1-AS1" "RP11-544A12.4" "LRIG2-DT"
## [21] "RP11-775D22.3" "RHOQ-AS1" "RP11-506H20.1" "LMO7-AS1"
## [25] "RP11-66H6.3" "RP11-126O1.4" "RP1-256G22.2" "RP11-261A24.1"
## [29] "RP11-168F24.3" "RP11-13J12.3"
## [1] 37
## [1] "XKR6" "SKA2" "KIAA0825" "KATNAL1" "PRKAG2" "CLIP4"
## [7] "MXI1" "PATJ" "CLVS2" "ST18" "PITPNC1" "RASA2"
## [13] "ZFYVE28" "EDIL3" "DLGAP1" "JAKMIP2" "SAMD12" "CSMD1"
## [19] "OPCML" "RALYL" "KCNIP4" "SLIT1" "KAZN" "BASP1-AS1"
## [25] "SATB1-AS1"
## [1] 38
## [1] "ABCB4" "OMD" "ACKR4" "ADAMTS17"
## [5] "ABCA8" "GLT1D1" "FOLR2" "MT1E"
## [9] "RAB37" "MCMDC2" "RP11-131L23.1" "EDNRB-AS1"
## [13] "SMCR5" "AC073115.7" "AC092431.3" "KIF5C-AS1"
## [17] "AC073115.6" "RP5-1101C3.1" "CTB-161M19.4" "MTRNR2L8"
## [21] "RP11-867G2.8" "PRANCR" "PSMD7-DT" "TCF4-AS1"
## [25] "RP11-121C2.3" "RP11-413N10.3" "RP11-275G7.2" "RP11-4F5.3"
## [29] "GADD45B"
## [1] 39
## [1] "NEXMIF" "CRYM" "LRRC6" "CALY"
## [5] "DUOX1" "HERC6" "RNF180" "DOK6"
## [9] "TSBP1-AS1" "RP13-188A5.1" "PCDH9-AS2" "IPO9-AS1"
## [13] "RAP2C-AS1" "RP11-123B3.2" "RP11-120A1.1" "RP11-541P9.3"
## [17] "ZNF10" "RP11-493L12.4" "LINC02316" "CEROX1"
## [21] "ZNF528-AS1" "LINC02666" "RP11-314L11.1" "RP1-80B9.4"
## [25] "RP4-545L17.11" "RP11-320L2.1" "RP11-617F9.2" "LINC01762"
## [29] "SAP30-DT"
## [1] 40
## [1] "ZNF804B" "SYT7" "ADGRL1" "IGSF9B" "NALCN"
## [6] "KCNT1" "ARFGEF3" "DNAH6" "CSMD2" "ATP8A1"
## [11] "PCSK2" "GPNMB" "ZNF365" "BTBD11" "ZNF385D"
## [16] "SPOCK1" "FBXO32" "GABRB1" "PRICKLE2" "ARHGEF3"
## [21] "ADGRV1" "SRRM3" "TMTC2" "GPC5" "KCTD16"
## [26] "RORB" "CTC-340A15.2" "LINC01414" "CTC-535M15.2"
for(n1 in names(goseq_res)){
k = as.numeric(gsub("set_", "", n1))
print(n1)
print(gene_sets[[k]])
print(goseq_res[[n1]])
}## [1] "set_2"
## [1] "MTERF1" "PIGC" "ADAMTS19" "ADGRG5"
## [5] "BEX3" "FLJ40194" "TMEM151B" "PCED1B"
## [9] "H2BC4" "H2BC18" "COL5A2" "ENTPD3-AS1"
## [13] "TEX41" "ZNF451-AS1" "RP4-568F9.6" "RP11-557H15.4"
## [17] "LINC02642" "BDNF-AS" "LINC02057" "AC008697.1"
## [21] "CTD-2537O9.1" "RP11-370I10.6" "H2AC19" "H2BC8"
## [25] "H2AC8" "RP11-495L19.1" "RP11-321P16.3" "RP11-399K19.1"
## $reactome
## category
## 412 hats acetylate histones
## 415 hcmv late events
## 416 hdacs deacetylate histones
## 1164 ub specific processing proteases
## 413 hcmv early events
## 414 hcmv infection
## 147 chromatin modifying enzymes
## 226 deubiquitination
## 9 activated pkn1 stimulates transcription of ar androgen receptor regulated genes k
## 76 assembly of the orc complex at the origin of replication
## over_represented_pvalue under_represented_pvalue numDEInCat numInCat
## 412 2.965695e-08 1 5 9
## 415 5.778771e-08 1 5 11
## 416 6.316395e-08 1 5 11
## 1164 3.170673e-07 1 5 16
## 413 3.274704e-07 1 5 14
## 414 4.014105e-07 1 5 15
## 147 1.143854e-06 1 5 20
## 226 1.512997e-06 1 5 24
## 9 1.930756e-06 1 4 8
## 76 1.930756e-06 1 4 8
## FDR
## 412 2.488660e-05
## 415 2.488660e-05
## 416 2.488660e-05
## 1164 7.741401e-05
## 413 7.741401e-05
## 414 7.907787e-05
## 147 1.311299e-04
## 226 1.311299e-04
## 9 1.311299e-04
## 76 1.311299e-04
##
## [1] "set_7"
## [1] "NAV3" "LY96" "FGGY" "ZDHHC14" "KCNQ3"
## [6] "MIR99AHG" "MEF2C-AS1" "LINC02232" "ATP6V0D1-DT" "KCNQ2"
## [11] "CUX2" "FRMD4B" "WDR45B" "SLC2A5" "HSPA12A"
## [16] "RBFOX3" "MYO1D" "KCNQ5" "ZNF846" "ZNF407"
## [21] "LNCAROD"
## $reactome
## category over_represented_pvalue
## 1177 voltage gated potassium channels 1.791959e-05
## under_represented_pvalue numDEInCat numInCat FDR
## 1177 0.9999999 3 7 0.02118095
##
## [1] "set_28"
## [1] "RPL3" "RPL18A" "RPL19" "RPL34" "RPL24"
## [6] "LDAH" "RPS24" "RPS2" "RPS11" "RPL11"
## [11] "RPS27A" "RPL32" "RAB28" "RPL26" "RPL38"
## [16] "RPLP2" "RPS23" "RPL12" "RPL23A" "ZNF433-AS1"
## [21] "ADARB1"
## $reactome
## category over_represented_pvalue
## 138 cellular response to starvation 0
## 301 eukaryotic translation elongation 0
## 302 eukaryotic translation initiation 0
## 452 influenza infection 0
## 557 metabolism of amino acids and derivatives 0
## 640 nonsense mediated decay nmd 0
## 801 regulation of expression of slits and robos 0
## 850 response of eif2ak4 gcn2 to amino acid deficiency 0
## 901 rrna processing 0
## 927 selenoamino acid metabolism 0
## under_represented_pvalue numDEInCat numInCat FDR
## 138 1 17 26 0
## 301 1 17 23 0
## 302 1 17 23 0
## 452 1 17 27 0
## 557 1 17 41 0
## 640 1 17 23 0
## 801 1 17 27 0
## 850 1 17 25 0
## 901 1 17 25 0
## 927 1 17 26 0
##
## $go_bp
## category over_represented_pvalue
## 630 cytoplasmic translation 0.000000e+00
## 4283 ribosome biogenesis 0.000000e+00
## 4279 ribosomal large subunit biogenesis 5.300278e-10
## 4278 ribosomal large subunit assembly 4.261838e-08
## 4281 ribosomal small subunit biogenesis 2.070857e-07
## 4282 ribosome assembly 2.095572e-07
## 3144 protein rna complex organization 7.898271e-06
## 2221 non membrane bounded organelle assembly 5.073840e-05
## under_represented_pvalue numDEInCat numInCat FDR
## 630 1.0000000 17 23 0.000000e+00
## 4283 1.0000000 8 13 0.000000e+00
## 4279 1.0000000 5 5 8.339104e-07
## 4278 1.0000000 4 4 5.028969e-05
## 4281 1.0000000 4 5 1.648517e-04
## 4282 1.0000000 4 5 1.648517e-04
## 3144 0.9999999 4 10 5.325691e-03
## 2221 0.9999976 5 30 2.993565e-02
##
## [1] "set_33"
## [1] "KIF26B" "ADAM22" "PDE4A" "CELF4" "MOB3B" "TGFBI"
## [7] "PPFIA2" "CACNA1B" "ARID5B" "CACNA1C" "CACNA2D1" "FMN2"
## [13] "SORCS3" "SYN2" "GRM5" "GRIN1" "CADM1" "SYN3"
## [19] "MYT1L" "DPYD" "RYR2"
## $go_bp
## category over_represented_pvalue
## 280 calcium ion transport into cytosol 8.454614e-07
## under_represented_pvalue numDEInCat numInCat FDR
## 280 1 4 5 0.003990578
## used (Mb) gc trigger (Mb) limit (Mb) max used (Mb)
## Ncells 8955014 478.3 16112782 860.6 NA 16112782 860.6
## Vcells 16664170 127.2 31295066 238.8 65536 31295066 238.8
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.4.1
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] goseq_1.50.0 geneLenDataBase_1.34.0 BiasedUrn_2.0.10
## [4] fgsea_1.24.0 biomaRt_2.54.1 limma_3.54.2
## [7] tidyr_1.3.0 ggpubr_0.6.0 ggplot2_3.4.2
## [10] data.table_1.14.8
##
## loaded via a namespace (and not attached):
## [1] nlme_3.1-162 matrixStats_1.0.0
## [3] bitops_1.0-7 bit64_4.0.5
## [5] filelock_1.0.2 progress_1.2.2
## [7] httr_1.4.6 GenomeInfoDb_1.34.9
## [9] tools_4.2.3 backports_1.4.1
## [11] bslib_0.4.2 utf8_1.2.3
## [13] R6_2.5.1 mgcv_1.8-42
## [15] DBI_1.1.3 BiocGenerics_0.44.0
## [17] colorspace_2.1-0 withr_2.5.0
## [19] tidyselect_1.2.0 prettyunits_1.1.1
## [21] bit_4.0.5 curl_5.0.1
## [23] compiler_4.2.3 cli_3.6.1
## [25] Biobase_2.58.0 xml2_1.3.4
## [27] DelayedArray_0.24.0 rtracklayer_1.58.0
## [29] sass_0.4.5 scales_1.2.1
## [31] rappdirs_0.3.3 Rsamtools_2.14.0
## [33] stringr_1.5.0 digest_0.6.31
## [35] rmarkdown_2.21 XVector_0.38.0
## [37] pkgconfig_2.0.3 htmltools_0.5.5
## [39] MatrixGenerics_1.10.0 dbplyr_2.3.2
## [41] fastmap_1.1.1 rlang_1.1.0
## [43] rstudioapi_0.14 RSQLite_2.3.1
## [45] BiocIO_1.8.0 jquerylib_0.1.4
## [47] generics_0.1.3 jsonlite_1.8.4
## [49] BiocParallel_1.32.6 dplyr_1.1.2
## [51] car_3.1-2 RCurl_1.98-1.12
## [53] magrittr_2.0.3 GO.db_3.16.0
## [55] GenomeInfoDbData_1.2.9 Matrix_1.6-4
## [57] Rcpp_1.0.10 munsell_0.5.0
## [59] S4Vectors_0.36.2 fansi_1.0.4
## [61] abind_1.4-5 lifecycle_1.0.3
## [63] stringi_1.7.12 yaml_2.3.7
## [65] carData_3.0-5 SummarizedExperiment_1.28.0
## [67] zlibbioc_1.44.0 org.Hs.eg.db_3.16.0
## [69] BiocFileCache_2.6.1 grid_4.2.3
## [71] blob_1.2.4 parallel_4.2.3
## [73] crayon_1.5.2 lattice_0.20-45
## [75] splines_4.2.3 Biostrings_2.66.0
## [77] cowplot_1.1.1 GenomicFeatures_1.50.4
## [79] hms_1.1.3 KEGGREST_1.38.0
## [81] knitr_1.44 pillar_1.9.0
## [83] GenomicRanges_1.50.2 rjson_0.2.21
## [85] ggsignif_0.6.4 codetools_0.2-19
## [87] stats4_4.2.3 fastmatch_1.1-3
## [89] XML_3.99-0.14 glue_1.6.2
## [91] evaluate_0.20 png_0.1-8
## [93] vctrs_0.6.2 gtable_0.3.3
## [95] purrr_1.0.1 cachem_1.0.7
## [97] xfun_0.39 broom_1.0.4
## [99] restfulr_0.0.15 rstatix_0.7.2
## [101] tibble_3.2.1 GenomicAlignments_1.34.1
## [103] AnnotationDbi_1.60.2 memoise_2.0.1
## [105] IRanges_2.32.0